import requests;
from lxml import etree;

#定义一个简单的html结构
html='''
<html>
	<div>
		<ul>
			<li class="item-0">
				<a href="link1.html">first item</a>
			</li>
			<li class="item-1">
				<a href="link2.html">second item</a>
			</li>	
			<li class="item-active">
				<a href="link3.html">third item</a>
			</li>	
			<li class="item-1">
				<a href="link4.html">fourth item</a>
			</li>			
			<li class="item-0">
				<a href="link5.html">fifth item</a>
			</li>
			<li class="else-1">something else</li>		
			this is ul item
		</ul>				
	</div>
</html>
''';

selector=etree.HTML(html);#初始化etree

#只能提取ul节点下一级的文本
ul_text=selector.xpath("//ul/text()");
print(ul_text);

#提取ul下所有文本
all_text=selector.xpath("string(//ul)");
print(type(all_text));
print(all_text);